library(ggthemes)
library(Hmisc)
library(gridExtra)
library(GGally)
library(tibble)
library(dplyr)
library(tidyr)
library(reshape2)
library(tidyverse)
library(ggplot2)
library(corrgram)
library(corrplot)
library(date)
library(readxl)
library(knitr)Setting theme
theme_set(theme_economist())In this project, we are analyzing the BIST30 members as of November 2018. The data set consist of last price ad volume data of equities listed in BIST30 Index from January 2011 to November 2018. Firstly, we have examined the data structure and, made some little transformation in the columns of data frame.
tmp<-tempfile(fileext=".xlsx")
download.file("https://github.com/MEF-BDA503/gpj18-data-jugglers/blob/master/XU030_v2.xlsx?raw=true",destfile=tmp,mode='wb')
bist30=read_xlsx(tmp)str(bist30$Dates)
range(bist30$Dates)
head(bist30)
tail(bist30)Normalization: Before starting beta analysis, we should normalize the data as we take date 01.01.2011 equals to 100 for all stocks’ price.
tmp<-tempfile(fileext=".xlsx")
download.file("https://github.com/MEF-BDA503/gpj18-data-jugglers/blob/master/XU030_v2.xlsx?raw=true",destfile=tmp,mode='wb')
bist30=read_xlsx(tmp)
bist30 <- cbind(bist30, apply(bist30[seq(2,60,2)], 2, function (a) a / a[[1]] * 100))
colnames(bist30)[62:91] <- paste(colnames(bist30[seq(2,60,2)]), "norm", sep = "_")
head(bist30[62:90],3)
norms <- bist30[,62:91]We will divide the data into two groups, financial sector companies, reel sector companies. Then we will compare two groups according to the annual yields, and illustrate them by graphics for each year.
Corrplot is a good source for correlation table, we can see the desired relations between two values in table by looking the correlation table. In this graph we use normalized data set.
“TASK1: Buraya korelasyon matrisi eklenecek”"
norms <- bist30[62:90]
corrValuesPrices <- cor(norms,norms)We need company names after
companyNames <- c('GARAN','SAHOL','BIMAS','ARCLK','EREGL','YKBNK','TOASO','TUPRS','KRDMD','TTRAK','ASELS','IPEKE','KOZAA','ASYAB','ENKAI','KOZAL','DOHOL','IHLAS','KCHOL','EKGYO','TCELL','MGROS','SISE','ISCTR','VAKBN','AKBNK','PETKM','THYAO','TTKOM','HALKB')corrplot(corrValuesPrices, method = "number")corrplot(corrValuesPrices, type = "upper", tl.pos = "td",
method = "circle", tl.cex = 0.5, tl.col = 'black',
order = "hclust", diag = FALSE)“TASK2: Hacim ve fiyat farkları alınarak grafikler güncellenecek.
# First I take the first 2055 row of prices then last 2055
prices1 <- head(bist30,n = 2055) %>% select(matches("price"))
prices2 <- tail(bist30,n = -1) %>% select(matches("price"))
# After, I divide the last 2055 to first 2055 which are the one before values.
pricesRat <- prices2 / prices1
pricesRat <- rowSums(pricesRat) / 30
# With rowsums / 30 I take the average of total values.
volume1 <- head(bist30,n = 2055) %>% select(matches("volume"))
volume2 <- tail(bist30,n = -1) %>% select(matches("volume"))
volumeRat <- volume2 / volume1
volumeRat <- rowSums(volumeRat) / 30
dates <- bist30$Dates[2:2056]
ratTable <- data.frame(dates,pricesRat,volumeRat)
head(ratTable)ggplot(ratTable,aes(x = dates,y = 0))+
geom_line(aes(y = pricesRat, colour = "pricesRat")) +
geom_line(aes(y = volumeRat, colour = "volumeRat")) +
xlab("Years") + ylab("Ratio Values of Volume and Price")print(cor(ratTable$pricesRat,ratTable$volumeRat))Quantmod provides a number of useful features for financial modelling.
if (!require("quantmod")) {
install.packages("quantmod")
library(quantmod)
}## Loading required package: quantmod
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
##
## Attaching package: 'quantmod'
## The following object is masked from 'package:Hmisc':
##
## Lag
start <- as.Date("2011-01-03")
end <- as.Date("2013-01-03")
bist30 <- bist30 %>% filter(
Dates >= start, Dates <= end
)
a = bist30["GARAN_Price_norm"]
head(a)library(xts)
library(dygraphs)start <- as.Date("2011-01-03")
end <- as.Date("2013-01-03")
which(bist30$Dates == start)
which(bist30$Dates == end)We could buy a stock in 15 days and observe the first 3 months to sell the stock in a one year. So what might change in this period?
first3Month1 <- bist30[1:15,] %>% summarise_all(mean) %>%
select(matches("norm"))
first3Month2 <- bist30[75:90,] %>% summarise_all(mean) %>%
select(matches("norm"))
year1 <- bist30[1:90,] %>% summarise_all(mean) %>%
select(matches("norm"))
year2 <- bist30[275:365,] %>% summarise_all(mean) %>%
select(matches("norm"))
quarter <- first3Month2 - first3Month1
year <- year2 - year1
quarter <- as.data.frame(t(quarter))
year <- as.data.frame(t(year))
total <- data.frame(companyNames,quarter,year)
names(total) <- c("companyNames","quarterDif","yearDif")
totalp1 <- total %>% arrange(quarterDif)
p2 <- total %>% arrange(yearDif)x <- colSums(year)
xFirst three month difference in stocks.
ggplot(p1,aes(x = reorder(companyNames, quarterDif),quarterDif,fill = companyNames)) + geom_bar(stat = 'identity') + theme(axis.text.x = element_text(angle=65, vjust=0.6)) + coord_flip()Yearly difference of stocks
ggplot(p1,aes(x = reorder(companyNames, yearDif),yearDif,fill = companyNames)) + geom_bar(stat = 'identity') + theme(axis.text.x = element_text(angle=65, vjust=0.6)) + coord_flip()cor(total$quarterDif[1:5],total$yearDif[1:5])cor(total$quarterDif[8:30],total$yearDif[8:30])Roughly we can say the head of changing stocks are competative in stock market but others are nearly stable.
We can divide the differences to look for what changes may be beneficial
difRatio <- total$yearDif / total$quarterDif
difRatio <- data.frame(companyNames,difRatio)
ggplot(difRatio,aes(x = reorder(companyNames, difRatio),difRatio,fill = companyNames)) + geom_bar(stat = 'identity') + theme(axis.text.x = element_text(angle=65, vjust=0.6)) + coord_flip()We have divided the dataset into two groups as financial sector companies and real sector companies. Now we will check whether this classification is true or not by using k-means.
#we calculate the annual average of stocks.
annual_average<- aggregate(bist30[,62:91], list(format(bist30$Dates, "%Y")), mean)
View(annual_average)
#get transpose of annual_average dataset.
transpose <- as.matrix(annual_average[,-1])
rownames(transpose) <- annual_average[,1]
annual_average <- t(transpose)
View(annual_average)
#lets define wcss(Within cluster sum of squares) vector.
wcss <- vector()
#lets find the distance to the center for each predefined center numbers.
for (i in 1:7) wcss[i] <- sum(kmeans(annual_average, i)$withinss)
View(wcss)
#lets determine the breakdown point of the wcss graph.
plot(1:7, wcss, type="l")#3 centers are the optimum number for centers.
kmeans(annual_average, 3)Structuring the table for kmeans.
TASK3: Aşağıdaki kod çalıştırılacak, çalışırsa bir sonrakichunk kaldırılacak.
bist30_cluster <- kmeans(annual_average,centers=3)
rownames(annual_average)
bist30_cluster$cluster
##Get the clusters
results=data.frame(stocks=names(bist30_cluster$cluster), cluster_stocks=bist30_cluster$cluster) %>% arrange(cluster_stocks,stocks)
results[,1]c1 <- companyNames
c2 <- c(3,3,2,2,2,3,2,2,2,2,1,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,2,2,3,3)
clusterFrame <- melt(data.frame(c1,c2))## Using c1 as id variables
clusterFrame$variable <- NULL
names(clusterFrame) <- c("Companies","Groups")| Companies | Groups |
|---|---|
| GARAN | 3 |
| SAHOL | 3 |
| BIMAS | 2 |
| ARCLK | 2 |
| EREGL | 2 |
| YKBNK | 3 |
| TOASO | 2 |
| TUPRS | 2 |
| KRDMD | 2 |
| TTRAK | 2 |
| ASELS | 1 |
| IPEKE | 3 |
| KOZAA | 3 |
| ASYAB | 3 |
| ENKAI | 3 |
| KOZAL | 3 |
| DOHOL | 3 |
| IHLAS | 3 |
| KCHOL | 2 |
| EKGYO | 3 |
| TCELL | 3 |
| MGROS | 3 |
| SISE | 2 |
| ISCTR | 3 |
| VAKBN | 3 |
| AKBNK | 3 |
| PETKM | 2 |
| THYAO | 2 |
| TTKOM | 3 |
| HALKB | 3 |
annual_average <- as.data.frame(annual_average)
annual_averageplot(annual_average, col = bist30_cluster$cluster)
points(bist30_cluster$centers, col = 1:2, pch = 8, cex = 2)data(annual_average)## Warning in data(annual_average): data set 'annual_average' not found
dissE <- daisy(annual_average)
dE2 <- dissE^2
sk2 <- silhouette(bist30_cluster$cl, dE2)
plot(sk2)fviz_cluster(bist30_cluster, data = annual_average)TASK4 Önce distance lara göre dersteki grafik denenecek, olmadı sort edilip grafik güncellenecek.
ggplot(clusterFrame, aes(x =Groups , y = seq(1,30,1), colour = Groups,fill = Groups)) + geom_text(label = clusterFrame$Companies) +
xlab(label = "Numbers of Groups") +
ylab(label = "Seperation according to Sequence") \[\frac{betweenSS}{totalSS} = 87.5 \]
Consequently, we can assume Aselsan as an outlier, so the result of k-means match up with our first assumption (there are two groups in bist30 stock such as real sector and financial sector) with regards to group classification.
We can measure the change in prices by looking last and first 3 months average differences. The differences between last and first can show us what the yileds would be beneficial if we purchase these stocks at 2011.
TASK5 momentum geçen yerler yield olarak güncellenecek, grafik altındaki rakamlar yüzdeye çevrilecek.
df1 <- head(bist30,n= 90)
df2 <- tail(bist30,n= 90)
averageFirst <- df1[0:61] %>% summarise_at(names(df1[0:61]), mean, na.rm = TRUE) %>%
select(matches("price"))
averageLast <- df2[0:61] %>% summarise_at(names(df2[0:61]), mean, na.rm = TRUE) %>%
select(matches("price"))
yields <- (averageLast - averageFirst) / averageFirst
yieldsFirst, we should combine the company names and yields
df_yields <- as.data.frame(t(yields))
yieldsTable <-melt(data.frame(companyNames ,df_yields))## Using companyNames as id variables
yieldsTable$variable <- NULL
yieldsTable$companyNames <- paste(yieldsTable$companyNames,"Momentum",sep= "&")
yieldsTable %>% arrange(desc(value))Lets visualize this
yieldsTable %>%
arrange(desc(value)) %>%
ggplot(data = ., aes(x = reorder(companyNames, value), y = value,
fill = companyNames, color = companyNames)) + geom_bar(stat = 'identity') + theme(axis.text.x = element_text(angle=65, vjust=0.6)) + coord_flip()We can find the max and min values of prices and calculate the measure of it. This will show us the a possible income for investor how to reach a maxiumum profit.
TASK 6 her bir hissenin max ve min tarihleri tablo olarak verilecek, min tarihi max tan büyük olanlar varsa hem tabloda hem de grafikten bu hisseleri çıkar.
maxVal <- bist30[1028:2056,] %>% summarise_all("max") %>%
select(matches("norm"))
minVal <- bist30[0:1028,] %>% summarise_all("min") %>%
select(matches("norm"))
maxmin <- (maxVal-minVal) / minValFirst we should look at min and max values dates to assure that max valua is later than min value.
dim(bist30)Then, we can turn this into a usable table
df_maxmin <- as.data.frame(t(maxmin))
maxminTable <-melt(data.frame(companyNames ,df_maxmin))## Using companyNames as id variables
maxminTable$variable <- NULL
maxminTable$companyNames <- paste(maxminTable$companyNames,"MaxminRange",sep= "&")
maxminTable %>% arrange(desc(value))Visualization of this
maxminTable %>%
arrange(desc(value)) %>%
ggplot(data = ., aes(x = reorder(companyNames, value), y = value,
fill = companyNames, color = companyNames)) + geom_bar(stat = 'identity') + theme(axis.text.x = element_text(angle=65, vjust=0.6)) + coord_flip()## Warning: Removed 30 rows containing missing values (position_stack).
TAKS7: Economic Volume ifadeleri yerine Traded Value yazılacak
The geometric average of volume and price can show us the traded value of companies. Thus, we can calculate this value and compare the companies by this.
df_volume <- bist30[0:61] %>% summarise_at(names(bist30[0:61]), mean, na.rm = TRUE) %>%
select(matches("Volume"))
df_prices <- bist30[0:61]%>% summarise_at(names(bist30[0:61]), mean, na.rm = TRUE) %>%
select(matches("Price"))
volumes <- as.data.frame(t(df_volume))
prices <- as.data.frame(t(df_prices))
tradedValue <- data.frame(companyNames,sqrt(volumes*prices))Visualization of this data frame
tradedValue %>%
arrange(desc(V1)) %>%
ggplot(data = ., aes(x = reorder(companyNames, V1), y = V1,
fill = companyNames, color = companyNames)) + geom_bar(stat = 'identity') + theme(axis.text.x = element_text(angle=65, vjust=0.6)) + coord_flip()TAKS9: Değerlendir, çıkarabilirsin
cor(economicValue,momentumTable)
Correlation: \(-0.03551822\)
The information about companies and their stocks’ purchase has a correlation nearly -0.1 which is too low to guess about it. That’s why, economic values of companies are not good parameter for price predictions.
First of all, we select the 6 banks from banking sector
names(bist30)
bankStocks <- c("GARAN_Price_norm","VAKBN_Price_norm","AKBNK_Price_norm","YKBNK _Price_norm","ASYAB_Price_norm","HALKB_Price_norm","ISCTR_Price_norm")Selecting from bist30 data table as made below
newbist30 <- bist30[,62:91]
selectedComp1 <- newbist30[,bankStocks]
selectedComp2 <- newbist30[ , !(names(newbist30) %in% bankStocks)]
totalComp <- newbist30 %>% select(matches("norm"))
selectedComp2Afer that, transpozing the vales help to summarise the values.
selectedComp2 <- as.data.frame(t(selectedComp2))
selectedComp1 <- as.data.frame(t(selectedComp1))
totalComp <- as.data.frame(t(totalComp))
names(selectedComp2) <- bist30$Dates
names(selectedComp1) <- bist30$Dates
names(totalComp) <- bist30$Dates
selectedComp1As you can see we summarise the values of normalized prices
meanVal1 <- selectedComp1 %>% summarise_all(mean)
meanVal2 <- selectedComp2 %>% summarise_all(mean)
Totalmean <- totalComp %>% summarise_all(mean)Then,naming and again transposing data to visualize in normal form
newValue1<- as.data.frame(t(meanVal1))
newValue2 <- as.data.frame(t(meanVal2))
totalValue <- as.data.frame(t(Totalmean))
names(newValue1) <- "Prices1"
names(newValue2) <- "Prices2"
names(totalValue) <- "TotalPrices"Merging this data tables helps us to generate a mean table for sector and general companies.
CompanyPrices <-data.frame(bist30$Dates,newValue1$Prices1,newValue2$Prices2,totalValue$TotalPrices)
names(CompanyPrices) <- c("Dates","bankSectorPrices","OtherPrices","TotalPrices")
CompanyPricesThe normalized prices of groups to the years
ggplot(CompanyPrices, aes(x = Dates)) +
geom_line(aes(y = bankSectorPrices, colour = "bankSectorPrices")) +
geom_line(aes(y = OtherPrices, colour = "OtherPrices")) +
geom_line(aes(y = TotalPrices, colour = "TotalPrices")) +
xlab("Years") + ylab("Normalized Prices")After all, banking sector is below of the other sectors by means of price values.
TASK 10: Ağırlıklandırmaa bankacılık ve reel sektör için bakılacak
prices <- bist30[62:91] %>%
select(matches("norm"))
volumes <- bist30[0:61] %>%
select(matches("Volume"))
priceVolumeProduct <- prices * volumesVolume Sums
TotalVolume <- rowSums(volumes)Weigthed Average
weigthedAverage <- priceVolumeProduct / TotalVolume
weigthedAverageChanging the names of columns
colnames(weigthedAverage) <- paste(companyNames,"wAvg",sep = "_")
weigthedAverageGrouping according to sectors.
bankStocksAvg <- c(24,14,25,26,30,1,6)In the below,we selected banking sectors and found the sums of groups. After that, collected them into a WavgTable data frame.
mainGroup <- weigthedAverage[,bankStocksAvg]
otherGroup <- weigthedAverage[ , c(-24,-14,-25,-26,-30,-1,-6)]
totalGroup <- weigthedAverage %>% select(matches("wAvg"))
mainGroup$Avg <- rowSums(mainGroup)
otherGroup$Avg <- rowSums(otherGroup)
totalGroup$Avg <- rowSums(totalGroup)
WAvgTable <- data.frame(bist30$Dates,mainGroup$Avg,otherGroup$Avg,totalGroup$Avg )We can visualize table as before
ggplot(WAvgTable, aes(x = bist30.Dates)) +
geom_line(aes(y = mainGroup.Avg, colour = "mainGroup.Avg")) +
geom_line(aes(y = otherGroup.Avg, colour = "otherGroup.Avg")) +
geom_line(aes(y = totalGroup.Avg, colour = "totalGroup.Avg")) +
xlab("Years") + ylab("Weighted Prices")Kmeans shows us which prices have highly momentum and which prices have lower momentum. Aselsan is the outlier because of its highly increase in price to the normalized value. Then, group 2 have high momenum then 3 group. We can deduce that, group 1 and group 2 stocks would be the best options for inverstors to get high income return.
ASELS and KRDMD have higher correlation with their volume which means they are related with their purchase willingleness of their stocks. We can assuma that, some inverstors predicted that their prices would get higher.
The price correlation shows that some investors can find relation between stocks which have highly momentum and which don’t require the highly but have correlation so that they predict others’ price changes. TUPRS & ASELS, ASELS & PETKM,EREGL & SISE,TOASO & KCHOL have higher momentum than average. EREGL has the a high momentum but SISE has not. However, some inverstor can predict SISE’s prices by EREGL and collect this stock in their portfolio.
The product of volume-price which is similary their geometric average could shows us the economic value of companies. For example, GARANTI BANK reached the most valuable BANK in 3. quarter in this year. This information shows the market share of companies similarly regarless of their price momentum. That’s why, investors use this information to predict price changes. However, If we look at
As it is mentioned, economic value of a company and the price changes are not correlated. That’s why, the information about these area not properly truthful for investment.
Another information is, TUPRAS A.S and other heavy industry companies have high economic value but stock market cannot show this value when we look at the geometric average, because there is a posibility that the rising companies market their values higher than normally high companies.
There are highly capable and successfull investors and other type investors. If higher investment capacity investors use their money in highly production companies, than TUPRAS result is not suprising. Another low capital investor just triggers the Volume and that’s why information about Garanti and others could be manuplative If we focus the price changes.
The banking sector is below the averages of other sectors such as heavy industry and similar company stocks. This is according to economic volume of companies and generally these banks belongs main companies which focuses on other sectors as KOC,SABANCI etc. These companies creates banks in order to supply their main job area. That’s why we cannot expect that the stocks of their banks could be higher than their heavy industry companies logically.
[1] - Correlation Graphs link
[2] - Correlation Graphs link
[3] - General Graphs link
[4] - Math formulas for R Markdown link
[5] - R Markdown Style link
[6] - Merging line plots link
[7] - Column drop options link
[8] - Summarising the total columns link
[9] - Row Summation link
[10] - Figure size editing link